###################################
# Generalized DiD: Crime Rates
###################################

rm(list=ls())

library(Hmisc)
library(readstata13)
library(foreign)
library(lmtest)
library(sandwich)
library(stargazer)
library(msm)
library(ggplot2)
library(did)
set.seed(1111)

#sink("12_generalized_did_rates.txt")

# functions
vcovCluster <- function(
  model,
  cluster
)
{
  require(sandwich)
  require(lmtest)
  if(nrow(model.matrix(model))!=length(cluster)){
    stop("check your data: cluster variable has different N than model")
  }
  M <- length(unique(cluster))
  N <- length(cluster)           
  K <- model$rank   
  if(M<50){
    warning("Fewer than 50 clusters, variances may be unreliable (could try block bootstrap instead).")
  }
  dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
  uj  <- apply(estfun(model), 2, function(x) tapply(x, cluster, sum));
  rcse.cov <- dfc * sandwich(model, meat = crossprod(uj)/N)
  return(rcse.cov)
}

round_df <- function(df, digits) {
  nums <- vapply(df, is.numeric, FUN.VALUE = logical(1))
  
  df[,nums] <- round(df[,nums], digits = digits)
  
  (df)
}

############################
# Read and prepare data 
############################

# load individual data
load("final_county_2023april6.RData")
names(d_county)
nrow(d_county)

###############################
# Rates & Binary Treatment
###############################

# Haiti 
reg1 = lm(d_county$crime_rates_s ~ d_county$t_stagg_haiti + as.factor(d_county$county) + as.factor(d_county$survey))
reg1_c = round(coeftest(reg1, vcov = vcovCluster(reg1, cluster = d_county$county)),4)
round(reg1_c[1:2,],3)

# All visas 
reg2 = lm(d_county$crime_rates_s ~ d_county$t_stagg_all + as.factor(d_county$county) + as.factor(d_county$survey))
reg2_c = round(coeftest(reg2, vcov = vcovCluster(reg2, cluster = d_county$county)),4)
round(reg2_c[1:2,],3)

##################################
# Rates & Continuous Treatment
##################################

# Haiti 
reg3 = lm(d_county$crime_rates_s ~ d_county$change_haiti_s + as.factor(d_county$county) + as.factor(d_county$survey))
reg3_c = round(coeftest(reg3, vcov = vcovCluster(reg3, cluster = d_county$county)),4)
round(reg3_c[1:2,],3)

# All visas 
reg4 = lm(d_county$crime_rates_s ~ d_county$change_all_s + as.factor(d_county$county) + as.factor(d_county$survey))
reg4_c = round(coeftest(reg4, vcov = vcovCluster(reg4, cluster = d_county$county)),4)
round(reg4_c[1:2,],3)

###############################
# Tables
###############################

# Table a2
stargazer(reg3_c,reg4_c,
          digits = 3,
          title="",
          align=TRUE, 
          omit.stat=c("LL","ser","f","rsq","adj.rsq"), 
          no.space=TRUE,  
          multicolumn = TRUE,
          table.placement = "H",
          column.separate = c(2,2),
          covariate.labels=c("Visas from Haiti","Visas from all countries"),
          dep.var.caption  = "Crime Rates" ,
          dep.var.labels.include = FALSE,
          star.cutoffs = c(0.05,0.01,0.001),
          keep = c("change_haiti_s","change_all_s"),
          add.lines = list(c("Covariates","No","No"),c("Observations","1,348","1,348")),
          out="tableA2.txt")

# Table a4
stargazer(reg1_c,reg2_c,
          digits = 3,
          title="",
          align=TRUE, 
          omit.stat=c("LL","ser","f","rsq","adj.rsq"), 
          no.space=TRUE,  
          multicolumn = TRUE,
          table.placement = "H",
          column.separate = c(2,2),
          covariate.labels=c("Visas from Haiti","Visas from all countries"),
          dep.var.caption  = "Crime Rates" ,
          dep.var.labels.include = FALSE,
          star.cutoffs = c(0.05,0.01,0.001),
          keep = c("t_stagg_haiti","t_stagg_all"),
          add.lines = list(c("Covariates","No","No"),c("Observations","1,348","1,348")),
          out="tableA4.txt")

sink()
